clear
capture restore
set scheme cleanplots, perm

* SET DIRECTORIES
global home "..."

global data "$home/Data/"
global output "$home/Output/"

cd $home

********************************************************************************	  
********************************************************************************
*3. Read-in Quarterly Compustat Data and Clean
*
* 	This section creates our main quarterly Compustat dataset, merging on the 
* 	data created in the above sections. 
********************************************************************************
********************************************************************************
********************************************************************************
use "${data}compustat_extended_sample_raw", clear

*Destring datacqtr to get year and quarter*
gen year_str = substr(datacqtr,1,4)
gen qtr_str  = substr(datacqtr,6,1)
destring year_str, gen(year)
destring qtr_str, gen(qtr)
drop year_str qtr_str datacqtr

*******************
*Merge on annual Peters and Taylor (2016) intangible capital data
gen fyear = fyearq
merge m:1 gvkey fyear using "${data}totalQ.dta", keepusing(gvkey fyear K_int K_int_offBS K_int_Know K_int_Org q_tot)
drop if _merge==2
drop _merge
drop fyear


*******************
***DATA CLEANING***
*******************
*Drop unnecessary variables
drop indfmt consol popsrc datafmt tic conm curcdq datafqtr fyearq exchg cik costat incorp datadate

order gvkey year qtr  

*Create timeseries variables
*Create numeric firm id
destring gvkey, gen(firmid)
*Create quarterly time variable
gen ts_qtr = tq(1988q1) + (year-1988)*4 + (qtr - 1)
format ts_qtr %tq

*Drop exact duplicates.
egen date_tag = tag(firmid ts_qtr)
drop if date_tag==0
drop date_tag

*Drop FIRE firms and public administration firms (or if missing sic)
destring sic, replace
*drop if sic>=6000 & sic<=6999
*drop if sic>=9100

*Only keep firms incorporated in the U.S.
keep if fic=="USA"

*Drop firm-quarters with acquisitions greater than 5% of assets
*NOTE: aqcy (acquisition value) is a year-to-date variable by fiscal quarter, not quarterly
*Create quarterly acquisition value variable
xtset firmid ts_qtr 

gen 	aqcq = 0
replace aqcq = aqcy 		 if fqtr==1 & !missing(aqcy)
replace aqcq = aqcy - L.aqcy if fqtr!=1 & !missing(L.aqcy) & !missing(aqcy)

gen 	aqcq_to_assets = aqcq / atq

drop if aqcq_to_assets>0.05 & !missing(atq)

*Drop 901 observations where assets and/or liabilities equal zero or negative
drop if atq<=0|ltq<=0

*Drop observations with missing accounting variables
drop if missing(atq)|missing(ltq)|missing(seqq)

*Drop if firm violates accounting identity by more than 10% of book value of assets
gen at_min_lt_min_seq = atq - ltq - seqq
gen acct_error_pct = at_min_lt_min_seq / atq
sum acct_error_pct, detail
gen flag = (acct_error_pct > .1 | acct_error_pct < -.1)
tab flag, missing
*Drop 14,887 observations that violate accounting identify 
drop if flag
drop at_min_lt_min_seq acct_error_pct flag

*Merge on quarterly MP shock data
*Note: 2019 Q3 and Q4 don't have MP shock data available yet
merge m:1 year qtr using "${data}MP_shocks_quarterly"
drop if _merge==2
drop _merge

merge m:1 year qtr using "${data}MP_shocks_daily_quarterly"
drop if _merge==2
drop _merge

drop pre_crisis post_crisis crisis_flg

*Create time period flags
gen 	pre_crisis = (ts_qtr<=193)

gen 	post_crisis = (ts_qtr>=198)

gen 	crisis_flg = (ts_qtr>193 & ts_qtr<198)

label var pre_crisis "1=July 1991 - June 2008"
label var post_crisis "1=July 2009 - June 2019"
label var crisis_flg "1=financial crisis date (July2008-June2009)"

order gvkey firmid year qtr pre_crisis post_crisis crisis_flg

*Merge on firm age
merge m:1 gvkey year using "${data}firm_age_data"
drop if _merge==2
drop _merge

*Create top-coded version of age variable
gen 	age_capped = age
replace age_capped = 48 if age>48 & !missing(age)

label var age_capped "Age, top-coded at 48yrs"

*Set stockholders equity to zero if negative (as in Hadlock & Pierce) to give D2C=1
replace seqq=0 if seqq<0

replace cheq=0 if cheq<0

*******************************
***CREATE LEVERAGE VARIABLES***
*******************************
gen lev_d2a = (dlcq + dlttq) / atq
label var lev_d2a "Leverage, debt to assets, quarterly"

gen lev_d2c = (dlcq + dlttq)/(seqq+dlcq+dlttq)
label var lev_d2c "Leverage, debt to capital, quarterly"

gen lev_d2e = (dlcq + dlttq)/(seqq)
label var lev_d2e "Leverage, debt to equity, quarterly"

*Create share of Total Debt by LT and ST
gen lt_debt_share = dlttq / (dlttq + dlcq)
gen st_debt_share = dlcq / (dlttq + dlcq)

xtset firmid ts_qtr

*Merge on Price Index data to create real sales growth
*Table 1.3.4. Price Indexes for Gross Value Added by Sector (Non-Farm Business Index)
merge m:1 year qtr using "${data}bea_nipa_price_index_extended"
*Drop dates outside of sample range
drop if _merge==1
drop if _merge==2
drop _merge

*Create tangible assets
gen tanq = atq - intanq

label var tanq "Tangible Assets"

replace xrdq=0 if missing(xrdq)

gen K_int_onBS = K_int - K_int_offBS

***Create quarterly capital expenditure variable
xtset

gen 	capxq = capxy if fqtr==1
replace capxq = D.capxy if fqtr!=1
replace capxq = . if missing(fqtr)

***Adjust capital stock variables by price level
replace ppentq=ppentq/(price_index/100)
replace ppegtq=ppegtq/(price_index/100)
replace capxq=capxq/(price_index/100)

***Deflate select variables
gen niq_real=niq/(price_index/100)
gen ivltq_real=ivltq/(price_index/100)
*gen ivstq_real=ivstq/(price_index/100)
gen invtq_real=invtq/(price_index/100)
gen intanq_real=intanq/(price_index/100)
gen tanq_real=tanq/(price_index/100)
gen xrdq_real=xrdq/(price_index/100)
gen saleq_real=saleq/(price_index/100)
gen K_int_real=K_int/(price_index/100)
gen K_int_offBS_real=K_int_offBS/(price_index/100)
gen K_int_onBS_real=K_int_onBS/(price_index/100)
gen K_int_Know_real=K_int_Know/(price_index/100)
gen K_int_Org_real=K_int_Org/(price_index/100)

*********************
***Create Controls***
*********************
xtset firmid ts_qtr

*Real Log Sales Growth
gen log_salesg_real = (ln(saleq / (price_index/100)) - ln(L4.saleq / (L4.price_index/100))) * 100
label var log_salesg_real "Log Sales Growth, YoY, deflated by price_index"

*Size (log assets)
gen size_assets = ln(atq/(price_index/100))
label var size_assets "log of total assets, quarterly"

*Current Assets as a Share of Total Assets
gen current_total_orig = actq / atq
gen current_total = 1 - (actq / atq)
label var current_total_orig "Current Assets as a Share of Total Assets"
label var current_total "1 - Current Assets as a Share of Total Assets"

gen liquidity_neg 		= 1 - (cheq / atq)
gen liquidity 			= cheq / atq

label var fqtr "Fiscal Quarter"

*Price-to-Cost margin
gen pcmq = (saleq - cogsq) / saleq
label var pcmq 	"Price-to-Cost Margin, quarterly"

*Receivables minus Payables to Sales
gen recpay2yq = (rectq - apq) / saleq
label var recpay2yq "Receivables minus Payables to Sales, quarterly"

*Dpreciation to Assets
gen d2aq = dpq / atq
label var d2aq 				"Depreciation to Assets, quarterly"

*Cash 2 Sales
gen cash2sales = (saleq - cond(missing(cogsq),0,cogsq) - cond(missing(xsgaq),0,xsgaq)) / saleq
label var cash2sales "Sales minus Cost of Goods Sold minus Selling Expenses over Sales, qtrly"

*Quarterly market_cap variable
*Note: Shares Outstanding is missing pre-1998 from daily Compustat/CRSP share data. 
*So use the quarterly market capitalization calculated from Compustat/CRSP dataset.
gen market_cap_q = (cshoq * prccq * 100) / price_index
gen ln_market_cap_q = ln(market_cap_q)

*Create Tobin's q variable
gen tobins_q = (atq + (prccq * cshoq) - ceqq - txditcq) / atq

label var tobins_q "Tobin's Q, quarterly"

*Winsorize tobins_q at 1% tails
sum tobins_q, detail
gen 	tobins_q_wins = tobins_q
replace tobins_q_wins = r(p1) if tobins_q<r(p1)
replace tobins_q_wins = r(p99) if tobins_q > r(p99) & !missing(tobins_q)

merge m:1 firmid qtr year using "${data}distance2default_qtrly"
drop if _merge==2
drop _merge

*Full Period standardization
sum edf
gen edf_std = (edf - r(mean)) / r(sd)

label var edf "Expected Default Frequency"

gen dd = -invnormal(edf)

label var dd "Distance to Default"

xtset firmid ts_qtr
***Standardize quarterly Compustat variables
foreach i in log_salesg_real size_assets current_total_orig current_total liquidity_neg liquidity pcmq recpay2yq d2aq cash2sales  ln_market_cap_q tobins_q lev_d2a lev_d2c lev_d2e lt_debt_share st_debt_share cshoq dd {

	gen `i'_1lag = L.`i'
	
	*Winsorize at 1% tails
	sum `i'_1lag, detail
	replace `i'_1lag = r(p1) if `i'_1lag < r(p1)
	replace `i'_1lag = r(p99) if `i'_1lag > r(p99) & !missing(`i'_1lag)
	
	*Full Period standardization
	sum `i'_1lag
	gen `i'_1lag_std = (`i'_1lag - r(mean)) / r(sd)
	
}

*Drop variables
drop pcmq-cash2sales ln_market_cap_q lev_d2a-st_debt_share

*Create industry variables
gen sic_2dig = floor(sic/100)

gen sic_agric 		 = 0 if sic_2dig!=.
gen sic_mining 		 = 0 if sic_2dig!=.
gen sic_construction = 0 if sic_2dig!=.
gen sic_manuf 		 = 0 if sic_2dig!=.
gen sic_transport 	 = 0 if sic_2dig!=.
gen sic_wholesale 	 = 0 if sic_2dig!=.
gen sic_retail 		 = 0 if sic_2dig!=.
gen sic_fire 	 	 = 0 if sic_2dig!=.
gen sic_services 	 = 0 if sic_2dig!=.

replace sic_agric 		 = 1 if sic_2dig<10
replace sic_mining 		 = 1 if sic_2dig>=10 & sic_2dig<=14
replace sic_construction = 1 if sic_2dig>=15 & sic_2dig<=17
replace sic_manuf 		 = 1 if sic_2dig>=20 & sic_2dig<=39
replace sic_transport 	 = 1 if sic_2dig>=40 & sic_2dig<=49
replace sic_wholesale 	 = 1 if sic_2dig>=50 & sic_2dig<=51
replace sic_retail 		 = 1 if sic_2dig>=52 & sic_2dig<=59
replace sic_fire 		 = 1 if sic_2dig>=60 & sic_2dig<=67
replace sic_services 	 = 1 if sic_2dig>=70 & sic_2dig<=89

gen 	sector = 1 if sic_agric==1
replace sector = 2 if sic_mining==1
replace sector = 3 if sic_construction==1
replace sector = 4 if sic_manuf==1
replace sector = 5 if sic_transport==1
replace sector = 6 if sic_wholesale==1
replace sector = 7 if sic_retail==1
replace sector = 8 if sic_services==1
replace sector = 9 if sic_fire==1

*Create value label for sector variable
label define sector_lbl 1 "Agriculture" 2 "Mining" 3 "Construction" 4 "Manufacturing" 5 "Transportation" 6 "Wholesale" 7 "Retail" 8 "Services" 9 "FIRE"
label values sector sector_lbl

*Create a qtr x sector FE, as in Ottonello & Winberry
egen qtr_industry = group(ts_qtr sector)

*Create quarterly Compustat variables from year-to-date versions
gen 	cdvcq = cdvcy if fqtr==1
replace cdvcq = cdvcy - L.cdvcy if fqtr>1

gen 	ibq = iby if fqtr==1
replace ibq = iby - L.iby if fqtr>1

gen 	pdvcq = pdvcy if fqtr==1
replace pdvcq = pdvcy - L.pdvcy if fqtr>1

*Drop dates outside to our sample period
*drop if year<1995 | year>2020

sort gvkey year qtr

label var lev_d2c_1lag "Leverage, debt to capital"
label var lev_d2a_1lag "Leverage, debt to assets"
label var lev_d2e_1lag "Leverage, debt to equity"
label var log_salesg_real_1lag "Log Real Sales Growth, YoY"
label var size_assets_1lag "Log of Total Assets"
label var current_total_1lag "1 - Current Assets as Share of Total Assets"
label var current_total_orig_1lag "Current Assets as Share of Total Assets"
label var liquidity_1lag "Cash as Share of Total Assets"
label var liquidity_neg_1lag "1 - Cash as Share of Total Assets"
label var pcmq_1lag "Price-to-Cost Margin"
label var recpay2yq_1lag "Receivables minus Payables to Sales"
label var d2aq_1lag "Depreciation to Assets"
label var cash2sales_1lag "Sales minus Cost of Goods Sold minus Selling Expenses over Sales"
label var ln_market_cap_q_1lag "Log of Market Cap"
label var tobins_q_1lag "Tobin's q, winsorized at 1% tails"


**************************************************
*Create investment variables*
**************************************************
xtset firmid ts_qtr 

by firmid: ipolate capxq ts_qtr, gen(capxq_ipolate)
xtset firmid ts_qtr 
gen capex_rate = capxq_ipolate/L.ppentq

*Create Capital Stock variable following Ottonello & Winberry methodology
*First, need to flag the first date that each firm reports their capital stock
gen 	capital_flag = 0
replace capital_flag = 1 if (ppegtq!=. & L.ppentq!=. & F.ppentq!=.) | (ppegtq!=. & ppentq!=. & F.ppentq!=.)

bysort firmid capital_flag: egen capital_date_temp = min(ts_qtr) if capital_flag==1
bysort firmid: egen capital_date = max(capital_date_temp)
format capital_date %tq

*Next, need to interpolate any missing net investment values between two non-missing values
sort firmid ts_qtr
gen 	ppentq_fill = ppentq
replace ppentq_fill = (L.ppentq + F.ppentq)/2 if L.ppentq!=. & F.ppentq!=. & ppentq==.

*Initialize capital stock value with first date that records the total capital 
*Then update capital stock with change in net capital stock
*If more than 2 consecutive quarters don't report net capital stock, then start
*again with gross capital stock value
gen 	capital_stock = ppegtq if ts_qtr==capital_date
replace capital_stock = L.capital_stock + D.ppentq_fill if ts_qtr!=capital_date
replace capital_stock = ppegtq if capital_stock==.
replace capital_stock = L.capital_stock + D.ppentq_fill if ts_qtr!=capital_date & L.capital_stock!=.

label var capital_stock "Capital Stock, quarterly"
drop capital_flag capital_date_temp capital_date 

*Create investment variable as percentage change in capital stock
gen investment_intensive = (log(capital_stock) - log(L.capital_stock)) * 100

label var investment_intensive "Log change in capital stock"

*Drop variables
drop fyear fyr seqq-fic aqcq aqcq_to_assets

*Order variables
order gvkey firmid cusip year qtr pre_crisis post_crisis crisis_flg capital_stock investment_intensive

*Create 8-digit CUSIP consistent with ivol dataset
gen cusip_8 = substr(cusip,1,8)

gen K_tot = capital_stock + K_int_real

*merge on P&T quarterly variables
merge 1:1 gvkey year qtr using "${data}PT_quarterly"
drop if _merge==2
drop _merge

destring gvkey, gen(gvkey_str)
drop gvkey
rename gvkey_str gvkey

merge 1:1 gvkey year qtr using "${data}qtrly_ivol_data"
drop if _merge==2
drop _merge

merge m:1 year qtr using "${data}agg_econ_vars_extended"
drop if _merge==2
drop _merge

*Save .dta file 
save "${data}Compustat_Quarterly_extended", replace

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

